1 Pre-processing

1.1 Load packages

library("RColorBrewer")
library(Signac)
library(Seurat)
library(GenomicRanges)
library(future)
#library(SeuratWrappers)
library(harmony)
library(EnsDb.Hsapiens.v86)
library(stringr)
library(dplyr)
library(ggplot2)
library(patchwork)
library(kableExtra)
library(tidyverse)

set.seed(173)

1.2 Parameters

# Paths
path_to_obj <- here::here("~/Documents/multiome_tonsil_Lucia/results/R_objects/11.tonsil_multiome_integrated_without_doublets_normalized.rds")

path_to_markers<-here::here("~/Documents/multiome_tonsil_Lucia/results/tables/tonsil_markers_no_doublets_05.csv")

# Thresholds
max_doublet_score_rna <- 0.3

1.3 Load data

tonsil_wnn_without_doublet <- readRDS(path_to_obj)

tonsil_markers_05<-read_csv(path_to_markers)
## New names:
## * `` -> ...1
## Rows: 8409 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): ...1, gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1.3.1 Get top n markers of each cluster

Resolution 0.05

tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) %>% write.csv(.,file=paste0("~/Documents/multiome_tonsil_Lucia/results/tables/", "top10_tonsil_markers_no_doublets_05.csv"))

tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) %>% write.csv(.,file=paste0("~/Documents/multiome_tonsil_Lucia/results/tables/", "top5_tonsil_markers_no_doublets_05.csv"))


top5_tonsil_markers_05<-tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
top7_tonsil_markers_05<-tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 7, wt = avg_log2FC)

top10_tonsil_markers_05<-tonsil_markers_05 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
df_top5<-as.data.frame(top5_tonsil_markers_05)
kbl(df_top5,caption = "Table of the top 5 marker of each cluster resolution 0.005") %>%
  kable_paper("striped", full_width = F)
Table 1: Table of the top 5 marker of each cluster resolution 0.005
…1 p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
BANK1 0 3.107693 0.993 0.428 0 0 BANK1
COL19A1 0 2.508477 0.619 0.107 0 0 COL19A1
ARHGAP24 0 2.262911 0.951 0.456 0 0 ARHGAP24
ADAM28 0 2.214714 0.815 0.338 0 0 ADAM28
MARCH1 0 2.113625 0.876 0.384 0 0 MARCH1
INPP4B 0 3.210898 0.909 0.120 0 1 INPP4B
FYB1 0 3.085975 0.933 0.131 0 1 FYB1
LEF1 0 2.824967 0.622 0.067 0 1 LEF1
IL6ST 0 2.674320 0.736 0.152 0 1 IL6ST
IL7R 0 2.663301 0.715 0.081 0 1 IL7R
HMGB2 0 2.958610 0.932 0.151 0 2 HMGB2
TUBA1B 0 2.887534 0.949 0.224 0 2 TUBA1B
H2AFZ 0 2.613753 0.947 0.252 0 2 H2AFZ
TOP2A 0 2.497528 0.796 0.021 0 2 TOP2A
STMN1 0 2.462446 0.917 0.086 0 2 STMN1
AC023590.11 0 2.944159 0.984 0.213 0 3 AC023590.1
MAML31 0 2.785879 0.834 0.216 0 3 MAML3
RAPGEF51 0 2.620607 0.930 0.163 0 3 RAPGEF5
AC104170.11 0 2.606568 0.820 0.110 0 3 AC104170.1
AFF21 0 2.377730 0.926 0.186 0 3 AFF2
CCL5 0 3.634883 0.742 0.026 0 4 CCL5
AOAH1 0 3.252688 0.791 0.158 0 4 AOAH
GNLY 0 2.901600 0.189 0.004 0 4 GNLY
NKG7 0 2.662984 0.559 0.011 0 4 NKG7
DTHD11 0 2.536666 0.515 0.044 0 4 DTHD1
IGHG1 0 5.974981 0.648 0.243 0 5 IGHG1
IGLC1 0 6.002027 0.835 0.415 0 5 IGLC1
IGKC 0 6.156868 0.964 0.892 0 5 IGKC
IGHA1 0 6.496845 0.678 0.408 0 5 IGHA1
IGLC2 0 6.068831 0.901 0.718 0 5 IGLC2
SLC8A11 0 4.322800 0.764 0.041 0 6 SLC8A1
LYZ 0 3.986308 0.714 0.008 0 6 LYZ
PLXDC2 0 3.382540 0.721 0.005 0 6 PLXDC2
S100A9 0 3.338779 0.282 0.020 0 6 S100A9
SPRR3 0 3.476821 0.173 0.014 0 6 SPRR3
LINC013741 0 4.400369 0.976 0.059 0 7 LINC01374
LINC01478 0 3.957180 0.920 0.008 0 7 LINC01478
FAM160A11 0 3.698304 0.928 0.010 0 7 FAM160A1
RUNX23 0 3.453543 0.992 0.186 0 7 RUNX2
TCF41 0 3.753866 0.992 0.534 0 7 TCF4
df_top7<-as.data.frame(top7_tonsil_markers_05)
df_mark<-as.data.frame(tonsil_markers_05)
kbl(df_top7,caption = "Table of the top 5 marker of each cluster resolution 0.005") %>%
  kable_paper("striped", full_width = F)
Table 2: Table of the top 5 marker of each cluster resolution 0.005
…1 p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
BANK1 0 3.107693 0.993 0.428 0 0 BANK1
COL19A1 0 2.508477 0.619 0.107 0 0 COL19A1
ARHGAP24 0 2.262911 0.951 0.456 0 0 ARHGAP24
ADAM28 0 2.214714 0.815 0.338 0 0 ADAM28
MARCH1 0 2.113625 0.876 0.384 0 0 MARCH1
AC120193.1 0 2.109600 0.755 0.278 0 0 AC120193.1
ZDHHC14 0 1.951733 0.589 0.227 0 0 ZDHHC14
INPP4B 0 3.210898 0.909 0.120 0 1 INPP4B
FYB1 0 3.085975 0.933 0.131 0 1 FYB1
LEF1 0 2.824967 0.622 0.067 0 1 LEF1
IL6ST 0 2.674320 0.736 0.152 0 1 IL6ST
IL7R 0 2.663301 0.715 0.081 0 1 IL7R
ST8SIA1 0 2.637194 0.567 0.071 0 1 ST8SIA1
BCL11B 0 2.632781 0.826 0.086 0 1 BCL11B
HMGB2 0 2.958610 0.932 0.151 0 2 HMGB2
TUBA1B 0 2.887534 0.949 0.224 0 2 TUBA1B
H2AFZ 0 2.613753 0.947 0.252 0 2 H2AFZ
TOP2A 0 2.497528 0.796 0.021 0 2 TOP2A
STMN1 0 2.462446 0.917 0.086 0 2 STMN1
HIST1H4C 0 2.409732 0.844 0.331 0 2 HIST1H4C
TUBB 0 2.306446 0.927 0.173 0 2 TUBB
AC023590.11 0 2.944159 0.984 0.213 0 3 AC023590.1
MAML31 0 2.785879 0.834 0.216 0 3 MAML3
RAPGEF51 0 2.620607 0.930 0.163 0 3 RAPGEF5
AC104170.11 0 2.606568 0.820 0.110 0 3 AC104170.1
AFF21 0 2.377730 0.926 0.186 0 3 AFF2
LHFPL21 0 2.343496 0.795 0.179 0 3 LHFPL2
MYO1E1 0 2.300488 0.963 0.335 0 3 MYO1E
CCL5 0 3.634883 0.742 0.026 0 4 CCL5
AOAH1 0 3.252688 0.791 0.158 0 4 AOAH
GNLY 0 2.901600 0.189 0.004 0 4 GNLY
NKG7 0 2.662984 0.559 0.011 0 4 NKG7
DTHD11 0 2.536666 0.515 0.044 0 4 DTHD1
PLCB11 0 2.494851 0.476 0.048 0 4 PLCB1
GZMK 0 2.480087 0.537 0.011 0 4 GZMK
IGHG1 0 5.974981 0.648 0.243 0 5 IGHG1
IGHGP 0 5.913659 0.501 0.128 0 5 IGHGP
IGHG3 0 5.894529 0.705 0.272 0 5 IGHG3
IGLC1 0 6.002027 0.835 0.415 0 5 IGLC1
IGKC 0 6.156868 0.964 0.892 0 5 IGKC
IGHA1 0 6.496845 0.678 0.408 0 5 IGHA1
IGLC2 0 6.068831 0.901 0.718 0 5 IGLC2
SLC8A11 0 4.322800 0.764 0.041 0 6 SLC8A1
LYZ 0 3.986308 0.714 0.008 0 6 LYZ
PLXDC2 0 3.382540 0.721 0.005 0 6 PLXDC2
S100A9 0 3.338779 0.282 0.020 0 6 S100A9
LCN2 0 3.326250 0.167 0.009 0 6 LCN2
LRMDA 0 3.205879 0.718 0.007 0 6 LRMDA
SPRR3 0 3.476821 0.173 0.014 0 6 SPRR3
LINC013741 0 4.400369 0.976 0.059 0 7 LINC01374
LINC01478 0 3.957180 0.920 0.008 0 7 LINC01478
FAM160A11 0 3.698304 0.928 0.010 0 7 FAM160A1
RUNX23 0 3.453543 0.992 0.186 0 7 RUNX2
P2RY141 0 3.366014 0.952 0.068 0 7 P2RY14
ZFAT2 0 3.361999 0.948 0.242 0 7 ZFAT
TCF41 0 3.753866 0.992 0.534 0 7 TCF4
#install.packages("htmlwidgets", type = "binary")
#install.packages("DT", type = "binary")

DT::datatable(df_top7)
DT::datatable(df_mark)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
markerGenes <- unique(tonsil_markers_05$gene)

geneSym <- ifelse(test = !grepl('NA', markerGenes), 
             yes = sub('.*?-', '', markerGenes),
             no = sub('-.*', '', markerGenes))


dot.10 <- DotPlot(tonsil_wnn_without_doublet, features = unique(top10_tonsil_markers_05$gene),cols = 'RdBu', cluster.idents = T) + theme(axis.text.x = element_text( size = 10, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels= geneSym)+ggtitle("res 0.05 top 10 of each cluster")

dot.5 <- DotPlot(tonsil_wnn_without_doublet, features = unique(top5_tonsil_markers_05$gene),cols = 'RdBu', cluster.idents = T) + theme(axis.text.x = element_text( size = 10, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels= geneSym)+ggtitle("res 0.05 top 5 of each cluster")
dot.10 +
  coord_flip() +
  theme(axis.title = element_blank(), axis.text.y = element_text(size = 5))

dot.5 +
  coord_flip() +
  theme(axis.title = element_blank(), axis.text.y = element_text(size = 7))

top7mark_cluster0<-top7_tonsil_markers_05[["gene"]][1:7]
top7mark_cluster1<-top7_tonsil_markers_05[["gene"]][8:14]
top7mark_cluster2<-top7_tonsil_markers_05[["gene"]][15:21]
top7mark_cluster3<-top7_tonsil_markers_05[["gene"]][22:28]
top7mark_cluster4<-top7_tonsil_markers_05[["gene"]][29:35]
top7mark_cluster5<-top7_tonsil_markers_05[["gene"]][36:42]
top7mark_cluster6<-top7_tonsil_markers_05[["gene"]][43:49]
top7mark_cluster7<-top7_tonsil_markers_05[["gene"]][50:56]

2 Markers exploration

markers_gg <- function(x){purrr::map(x, function(x) {
  
  p <- FeaturePlot(
    tonsil_wnn_without_doublet,
    features = x,
    reduction = "wnn.umap",
    pt.size = 0.1
  )
  p
})}
m<-c("PRDM1","XBP1","IRF4","MEF2B","BCL6")

DZ<-c("SUGCT", "CXCR4", "AICDA")

LZ<- c("CD83","BCL2A1")

GC<- c("MEF2B", "BCL6","IRF4")

PC<- c("PRDM1","SLAMF7", "MZB1", "FKBP11")

2.1 cluster 0; memory and naive b cells

markers_gg(top7mark_cluster0)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

naive_markers<-c("CD79A", "CD79B", "BLNK")
memory_markers<-c("CD27")
markers_gg(naive_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(memory_markers)
## [[1]]

markers_gg(c("MS4A1","NT5E"))
## [[1]]

## 
## [[2]]

2.2 cluster 1: naive CD4 T-celL

IL6ST: naive CD4 T-cel

CCR7, CD62L, and CD45RA

cd4<- c("CCR7")
markers_gg(top7mark_cluster1)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

markers_gg(cd4)
## [[1]]

2.3 cluster 2: GC, DZ

markers_gg(top7mark_cluster2)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

markers_gg(GC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(DZ)
## [[1]]

## 
## [[2]]

## 
## [[3]]

2.4 cluster 3: GC, LZ

markers_gg(top7mark_cluster3)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

markers_gg(LZ)
## [[1]]

## 
## [[2]]

2.5 cluster 4: NK

ccl5:cd8t cell, nk

AOAH: NK GNLY:NK

markers_gg(top7mark_cluster4)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

markers_gg("KIR2DL4")
## [[1]]

2.6 cluster 5: PC

markers_gg(top7mark_cluster5)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

markers_gg( "MYO1E")
## [[1]]

markers_gg(PC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

2.7 cluster 6: monocites

markers_gg(top7mark_cluster6)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

monocytes_markers<-c("LYZ","S100A8")
markers_gg(monocytes_markers)
## [[1]]

## 
## [[2]]

2.8 cluster 7

markers_gg(top7mark_cluster7)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3 Number of cell in each cluster

DimPlot(tonsil_wnn_without_doublet, reduction = "wnn.umap", label = TRUE, pt.size = 0.5) 

cell.num <- table(Idents(tonsil_wnn_without_doublet))
cell.num
## 
##     0     1     2     3     4     5     6     7 
## 27168 17606  8469  7281  2656  2218   742   250

4 Rename clusters

cell.num <- table(Idents(tonsil_wnn_without_doublet))
cell.num
## 
##     0     1     2     3     4     5     6     7 
## 27168 17606  8469  7281  2656  2218   742   250
new.cluster.ids <- c("Naive/MBC", "Naive CD4 T-celL","GC/DZ", "GC/LZ", "NK T-cell", "PC", "Monocytes","NI")
names(new.cluster.ids) <- levels(tonsil_wnn_without_doublet)
tonsil_wnn_without_doublet <- RenameIdents(tonsil_wnn_without_doublet, new.cluster.ids)
DimPlot(tonsil_wnn_without_doublet, reduction = "wnn.umap", label = TRUE, pt.size = 0.5) 

5 Bibliography markers

MARKERS

Immature B cells express CD19, CD 20, CD34, CD38, and CD45R, T-cell receptor/CD3 complex (TCR/CD3 complex)

  • T-cells (identified by high expression of CD3D and CD3E).
  • monocytes (identified by high expression of LYZ and S100A8).
  • naive B-cells (identified by high expression of CD79A, CD79B and BLNK).
  • plasma cells (identified by B-cell and proliferation markers, such as TOP2A or MKI67).
  • poor-quality cells (identified by high mitochondrial expression). If a cell has pores in the membrane due to cell lysis, the cytosolic mRNA will leak out of the cell; but if the diameter of mitochondria is higher than the pores, they will get trapped inside the cell.

DZ: SUGCT, CXCR4, AICDA

LZ: CD83, BCL2A1

GC total: MEF2B, BCL6, IRF4

PC: PRDM1, SLAMF7, MZB1, FKBP11

canonical_bcell_markers <-c("CD34", "CD38", "CD19")

monocytes_markers<-c("LYZ","S100A8")

naive_markers<-c("CD79A", "CD79B", "BLNK")

bib_Bcell_markers<-c("CD19","CR2","MS4A1","RALGPS2","CD79A")
bib_Tcell_markers<-c("CD3E","CD4","CD8A","FOXP3","IL17A")

markers_bcell<-c("BANK1","ARHGAP24","ADAM28","MARCH1","RAPGEF5","AFF2","RGS13","LPP","IGHG1","IGLC1","SLC8A1","LYZ","PLXDC2","FAM160A1","IGHA1","IGLC2", "SETBP1","ENTPD1","COL19A1","CCSER1")

markers_tcell<-c("INPP4B","FYB1","LEF1","IL7R","IL6ST","CCL5","GNLY","NKG7","DTHD1","RUNX2", "FOXP3","CD8A","IL17A","CD2")

5.1 B cells

markers_gg(naive_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(bib_Bcell_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

5.2 T-cells

CD8+ T cell markers:“CD3D”, “CD8A” NK cell markers:“GNLY”, “NKG7”

markers_gg(bib_Tcell_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

5.3 Pauli markers

naive_mem_bcell<-c("BANK1", "FCER2")
cd4_tcell<-c("CD3D", "IL7R")
dz_gc_bcell<-c("MKI67", "TOP2A")
lz_gc_bcell<-c("MARCKSL1", "RGS13", "LMO2", "CCDC88A")  
cytotoxic<-c("GNLY", "NKG7", "GZMK", "CD8A")
memory_bcell<-c(    "FCRL4", "FCRL5", "PLAC8", "SOX5")
pc<-c("IGHG1", "IGHA1", "JCHAIN", "XBP1")
myeloid<-c("LYZ", "S100A8") 
poor_q_doublets <-c("FDCSP", "CLU", "CXCL13", "CR2")
doublet_proliferative_tcell<-c("MT2A", "CD3D", "TRAC", "PCNA")
Unk<-c("PTPRCAP", "CD37", "CD74")   
PDC<-c("PTCRA", "LILRA4", "IRF7")
markers_gg(naive_mem_bcell)
## [[1]]

## 
## [[2]]

markers_gg(cd4_tcell)
## [[1]]

## 
## [[2]]

markers_gg(dz_gc_bcell)
## [[1]]

## 
## [[2]]

markers_gg(lz_gc_bcell)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

markers_gg(cytotoxic)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Cytotoxic T cells are effector cells that destroy virus-infected cells, tumor cells, and tissue grafts that exist in the cytosol, or contiguous nuclear compartment. The cells are also known as CD8+ T

markers_gg(pc)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

markers_gg(myeloid)
## [[1]]

## 
## [[2]]

markers_gg(poor_q_doublets)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

markers_gg(doublet_proliferative_tcell)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Proliferating cell nuclear antigen (PCNA)

markers_gg(Unk)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(PDC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg("FOXO1")
## [[1]]

“NFKB1”: CLUSTER 2

markers_gg( "MYO1E")
## [[1]]

5.4 Doublet score

FeaturePlot(
    tonsil_wnn_without_doublet,
    features = "doublet_scores",
    reduction = "wnn.umap",
    pt.size = 0.1
  )

6 CellCycleScoring

s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

tonsil_wnn_without_doublet <- CellCycleScoring(tonsil_wnn_without_doublet, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
## Warning: The following features are not present in the object: MLF1IP, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: FAM64A, HN1, not
## searching for symbol synonyms
head(tonsil_wnn_without_doublet[[]])
##                                              lib_name_barcode    orig.ident
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 BCLL_15_T_1_AAACAGCCAGCAACCT-1 SeuratProject
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 BCLL_15_T_1_AAACAGCCAGCTTAGC-1 SeuratProject
## BCLL_15_T_1_AAACAGCCATTATGGT-1 BCLL_15_T_1_AAACAGCCATTATGGT-1 SeuratProject
## BCLL_15_T_1_AAACATGCAAATTGCT-1 BCLL_15_T_1_AAACATGCAAATTGCT-1 SeuratProject
## BCLL_15_T_1_AAACATGCAGGACACA-1 BCLL_15_T_1_AAACATGCAGGACACA-1 SeuratProject
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 BCLL_15_T_1_AAACATGCAGGCCAAA-1 SeuratProject
##                                nCount_RNA nFeature_RNA nCount_ATAC
## BCLL_15_T_1_AAACAGCCAGCAACCT-1       2938         1493       14475
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1       5693         2527       14100
## BCLL_15_T_1_AAACAGCCATTATGGT-1       2391         1408        6201
## BCLL_15_T_1_AAACATGCAAATTGCT-1       5899         2580        1984
## BCLL_15_T_1_AAACATGCAGGACACA-1       5427         2582       15009
## BCLL_15_T_1_AAACATGCAGGCCAAA-1       2377         1121       12678
##                                nFeature_ATAC nucleosome_signal
## BCLL_15_T_1_AAACAGCCAGCAACCT-1          6069         0.9178862
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1          5960         0.7073955
## BCLL_15_T_1_AAACAGCCATTATGGT-1          2759         0.7165354
## BCLL_15_T_1_AAACATGCAAATTGCT-1           967         0.7112069
## BCLL_15_T_1_AAACATGCAGGACACA-1          6360         0.8482014
## BCLL_15_T_1_AAACATGCAGGCCAAA-1          5233         0.5805921
##                                nucleosome_percentile TSS.enrichment
## BCLL_15_T_1_AAACAGCCAGCAACCT-1                  0.88       4.890692
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1                  0.44       3.685808
## BCLL_15_T_1_AAACAGCCATTATGGT-1                  0.47       6.287590
## BCLL_15_T_1_AAACATGCAAATTGCT-1                  0.45       7.088911
## BCLL_15_T_1_AAACATGCAGGACACA-1                  0.78       5.591753
## BCLL_15_T_1_AAACATGCAGGCCAAA-1                  0.15       5.826994
##                                TSS.percentile tss.level percent.mt percent_ribo
## BCLL_15_T_1_AAACAGCCAGCAACCT-1           0.31      High   9.326072     5.616065
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1           0.03      High   3.249605     4.180573
## BCLL_15_T_1_AAACAGCCATTATGGT-1           0.84      High   3.554998     9.912171
## BCLL_15_T_1_AAACATGCAAATTGCT-1           0.94      High   8.442109     7.119851
## BCLL_15_T_1_AAACATGCAGGACACA-1           0.65      High   6.467662     6.854616
## BCLL_15_T_1_AAACATGCAGGCCAAA-1           0.73      High  13.378208    19.099706
##                                nCount_peaks nFeature_peaks library_name
## BCLL_15_T_1_AAACAGCCAGCAACCT-1         7403           6067  BCLL_15_T_1
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1         7876           6443  BCLL_15_T_1
## BCLL_15_T_1_AAACAGCCATTATGGT-1         3011           2632  BCLL_15_T_1
## BCLL_15_T_1_AAACATGCAAATTGCT-1         1109           1053  BCLL_15_T_1
## BCLL_15_T_1_AAACATGCAGGACACA-1         7473           6202  BCLL_15_T_1
## BCLL_15_T_1_AAACATGCAGGCCAAA-1         5989           4857  BCLL_15_T_1
##                                 donor_id  sex age   age_group hospital    assay
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACAGCCATTATGGT-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACATGCAAATTGCT-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACATGCAGGACACA-1 BCLL-15-T male  33 young_adult     CIMA multiome
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 BCLL-15-T male  33 young_adult     CIMA multiome
##                                          barcodes doublet_scores
## BCLL_15_T_1_AAACAGCCAGCAACCT-1 AAACAGCCAGCAACCT-1          0.020
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1 AAACAGCCAGCTTAGC-1          0.024
## BCLL_15_T_1_AAACAGCCATTATGGT-1 AAACAGCCATTATGGT-1          0.062
## BCLL_15_T_1_AAACATGCAAATTGCT-1 AAACATGCAAATTGCT-1          0.103
## BCLL_15_T_1_AAACATGCAGGACACA-1 AAACATGCAGGACACA-1          0.138
## BCLL_15_T_1_AAACATGCAGGCCAAA-1 AAACATGCAGGCCAAA-1          0.019
##                                predicted_doublets peaks.weight RNA.weight
## BCLL_15_T_1_AAACAGCCAGCAACCT-1              FALSE    0.4765878  0.5234122
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1              FALSE    0.4848371  0.5151629
## BCLL_15_T_1_AAACAGCCATTATGGT-1              FALSE    0.4136732  0.5863268
## BCLL_15_T_1_AAACATGCAAATTGCT-1              FALSE    0.5659107  0.4340893
## BCLL_15_T_1_AAACATGCAGGACACA-1              FALSE    0.3559236  0.6440764
## BCLL_15_T_1_AAACATGCAGGCCAAA-1              FALSE    0.5752736  0.4247264
##                                wsnn_res.0.005 wsnn_res.0.01 seurat_clusters
## BCLL_15_T_1_AAACAGCCAGCAACCT-1              0             0               0
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1              2             2               3
## BCLL_15_T_1_AAACAGCCATTATGGT-1              1             1               1
## BCLL_15_T_1_AAACATGCAAATTGCT-1              3             4               6
## BCLL_15_T_1_AAACATGCAGGACACA-1              1             1               4
## BCLL_15_T_1_AAACATGCAGGCCAAA-1              0             0               0
##                                sub.cluster_0.25 sub.cluster0_0.5 is_doublet
## BCLL_15_T_1_AAACAGCCAGCAACCT-1                0              0_4      FALSE
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1                2                2      FALSE
## BCLL_15_T_1_AAACAGCCATTATGGT-1              1_0                1      FALSE
## BCLL_15_T_1_AAACATGCAAATTGCT-1                4                4      FALSE
## BCLL_15_T_1_AAACATGCAGGACACA-1              1_6                1      FALSE
## BCLL_15_T_1_AAACATGCAGGCCAAA-1                0              0_0      FALSE
##                                wsnn_res.0.05 wsnn_res.0.75 wsnn_res.0.075
## BCLL_15_T_1_AAACAGCCAGCAACCT-1             0             1              0
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1             3             4              3
## BCLL_15_T_1_AAACAGCCATTATGGT-1             1             2              1
## BCLL_15_T_1_AAACATGCAAATTGCT-1             6            17              6
## BCLL_15_T_1_AAACATGCAGGACACA-1             4            15              4
## BCLL_15_T_1_AAACATGCAGGCCAAA-1             0             0              0
##                                    S.Score   G2M.Score Phase        old.ident
## BCLL_15_T_1_AAACAGCCAGCAACCT-1  0.04267816 -0.04808593     S        Naive/MBC
## BCLL_15_T_1_AAACAGCCAGCTTAGC-1  0.05184659 -0.15125381     S            GC/LZ
## BCLL_15_T_1_AAACAGCCATTATGGT-1 -0.09496973 -0.04192763    G1 Naive CD4 T-celL
## BCLL_15_T_1_AAACATGCAAATTGCT-1 -0.12384198 -0.08988897    G1        Monocytes
## BCLL_15_T_1_AAACATGCAGGACACA-1 -0.07576004 -0.15292531    G1        NK T-cell
## BCLL_15_T_1_AAACATGCAGGCCAAA-1  0.02528023 -0.04173396     S        Naive/MBC
print(x = tonsil_wnn_without_doublet[["pca"]], 
      dims = 1:10, 
      nfeatures = 5)
## PC_ 1 
## Positive:  MKI67, TOP2A, MYBL1, STMN1, AC023590.1 
## Negative:  BCL2, FYB1, TC2N, INPP4B, BCL11B 
## PC_ 2 
## Positive:  FYB1, INPP4B, BCL11B, TC2N, CD247 
## Negative:  TCF4, FCRL5, COL19A1, AUTS2, CD83 
## PC_ 3 
## Positive:  MAML3, AC104170.1, LHFPL2, FGD6, CCDC88A 
## Negative:  COL19A1, TOP2A, ASPM, UBE2C, DLGAP5 
## PC_ 4 
## Positive:  AC104170.1, RAPGEF5, FGD6, AC023590.1, AFF2 
## Negative:  PLXDC2, LRMDA, DOCK5, LYZ, CSF2RA 
## PC_ 5 
## Positive:  DERL3, MZB1, XBP1, FKBP11, CFAP54 
## Negative:  BACH2, SLC8A1, LPP, PLXDC2, LRMDA 
## PC_ 6 
## Positive:  KIF14, PLK1, CENPE, DEPDC1, HMMR 
## Negative:  MCM4, PCNA, DTL, HSP90AB1, CLSPN 
## PC_ 7 
## Positive:  NELL2, LINC00861, PDE3B, LEF1, IL7R 
## Negative:  DRAIC, KSR2, TOX2, ICA1, PTPN14 
## PC_ 8 
## Positive:  SPRR3, LCN2, S100A9, KRT13, MUC4 
## Negative:  MCTP1, TOX2, DRAIC, LYZ, SLC8A1 
## PC_ 9 
## Positive:  ACTG1, RPS26, CDC20, CFL1, TCL1A 
## Negative:  AC105402.3, BRIP1, FKBP5, PDE4D, HIPK2 
## PC_ 10 
## Positive:  BACH2, TCL1A, GAB1, IGHM, P2RY14 
## Negative:  FCRL4, DNAH8, AL355076.2, ATP8B4, MIR155HG

PCNA: Proliferating cell nuclear antigen

# Visualize the distribution of cell cycle markers across
RidgePlot(tonsil_wnn_without_doublet, features = c("PCNA", "TOP2A", "MCM6", "MKI67"), ncol = 2)
## Picking joint bandwidth of 0.0706
## Picking joint bandwidth of 0.056
## Picking joint bandwidth of 0.0482
## Picking joint bandwidth of 0.0468

tonsil_wnn_without_doublet <- RunPCA(tonsil_wnn_without_doublet, features = c(s.genes, g2m.genes))
## Warning in PrepDR(object = object, features = features, verbose = verbose): The
## following 30 features requested have not been scaled (running reduction without
## them): TYMS, MCM2, UNG, PRIM1, UHRF1, MLF1IP, RFC2, RPA2, RAD51AP1, SLBP, UBR7,
## POLD3, MSH2, RAD51, TIPIN, DSCC1, BLM, CASP8AP2, USP1, CHAF1B, FAM64A, HN1,
## RANGAP1, NCAPD2, PSRC1, LBR, CTCF, G2E3, CBX5, CENPA
## Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large
## a percentage of total singular values, use a standard svd instead.
## PC_ 1 
## Positive:  MKI67, TOP2A, TPX2, NUSAP1, CENPE, GTSE1, CDK1, CENPF, AURKB, ANLN 
##     HMGB2, HMMR, BUB1, KIF11, DLGAP5, BIRC5, NDC80, UBE2C, CDCA2, TUBB4B 
##     RRM2, KIF23, CDCA3, ECT2, CDCA8, CKAP2L, KIF2C, CCNB2, TTK, HJURP 
## Negative:  POLA1, CCNE2, MCM5, MCM6, CDC6, NASP, EXO1, GINS2, GAS2L3, DTL 
##     CDC45, ATAD2, CKAP2, TMPO, HELLS, WDR76, RRM1, CKAP5, ANP32E, GMNN 
##     FEN1, MCM4, KIF20B, PCNA, NEK2, BRIP1, TACC3, E2F8, CDCA7, CLSPN 
## PC_ 2 
## Positive:  MCM4, CLSPN, DTL, HELLS, CDC45, PCNA, GINS2, CDC6, BRIP1, WDR76 
##     EXO1, MCM6, POLA1, ATAD2, FEN1, CCNE2, MCM5, RRM2, E2F8, GMNN 
##     NASP, RRM1, CDCA7, SMC4, TMPO, HMGB2, ANP32E, KIF11, NUSAP1, MKI67 
## Negative:  AURKA, GAS2L3, CDC20, HMMR, CENPE, UBE2C, NEK2, DLGAP5, CENPF, CCNB2 
##     KIF23, CDCA8, TPX2, CDCA3, BUB1, TTK, TOP2A, HJURP, CKAP2L, GTSE1 
##     CKS2, CKAP2, CDC25C, ECT2, NUF2, KIF2C, CKAP5, TUBB4B, BIRC5, AURKB 
## PC_ 3 
## Positive:  ANLN, E2F8, CDC25C, RRM2, NDC80, KIF11, TMPO, ECT2, CKAP2L, HJURP 
##     BRIP1, KIF23, CDCA2, EXO1, GTSE1, TTK, ATAD2, CDK1, BUB1, CKAP5 
##     MKI67, SMC4, KIF20B, POLA1, RRM1, KIF2C, NUSAP1, TACC3, TOP2A, NUF2 
## Negative:  CDC20, CCNB2, CKS2, CKS1B, NEK2, BIRC5, GINS2, NASP, HMGB2, TUBB4B 
##     DTL, ANP32E, MCM5, MCM4, MCM6, CDCA7, CENPF, AURKA, CDC6, CDCA3 
##     UBE2C, GMNN, HMMR, PCNA, CDC45, CDCA8, TPX2, DLGAP5, GAS2L3, FEN1 
## PC_ 4 
## Positive:  FEN1, RRM1, E2F8, ANP32E, PCNA, CKS1B, RRM2, AURKB, CDCA3, TUBB4B 
##     CKS2, NASP, HMGB2, GMNN, BIRC5, UBE2C, NDC80, CDK1, MKI67, TACC3 
##     GTSE1, KIF2C, NUSAP1, HJURP, TOP2A, CKAP2L, CLSPN, ANLN, CDCA8, GINS2 
## Negative:  GAS2L3, POLA1, DTL, CKAP2, KIF20B, CKAP5, NEK2, MCM6, BRIP1, HELLS 
##     CDC45, EXO1, ECT2, CDCA7, TTK, CDC6, AURKA, CENPE, CCNB2, HMMR 
##     CENPF, CDCA2, WDR76, ATAD2, CDC20, SMC4, CCNE2, DLGAP5, MCM4, NUF2 
## PC_ 5 
## Positive:  MCM5, NASP, KIF20B, CKAP5, CKAP2, ANP32E, GINS2, FEN1, CDC25C, TTK 
##     KIF11, CDCA7, POLA1, ANLN, MCM6, CDCA2, PCNA, BUB1, RRM1, HJURP 
##     KIF2C, MCM4, HELLS, KIF23, NDC80, SMC4, ECT2, CKS2, CDK1, CKAP2L 
## Negative:  CCNE2, TACC3, CDC6, NEK2, DTL, EXO1, CDC20, CCNB2, BIRC5, CDC45 
##     TMPO, ATAD2, HMGB2, CLSPN, CDCA3, CENPF, E2F8, WDR76, BRIP1, TUBB4B 
##     HMMR, CKS1B, RRM2, GAS2L3, AURKB, TOP2A, DLGAP5, NUF2, MKI67, AURKA
tonsil_wnn_without_doublet <- RunUMAP(object = tonsil_wnn_without_doublet,
  nn.name = "weighted.nn",
  reduction.name = "wnn.umap",
  reduction.key = "wnnUMAP_" )
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:16:13 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:16:16 Commencing smooth kNN distance calibration using 1 thread
## 17:16:19 Initializing from normalized Laplacian + noise
## 17:16:23 Commencing optimization for 200 epochs, with 2156298 positive edges
## 17:17:07 Optimization finished
DimPlot(tonsil_wnn_without_doublet,
    reduction = "wnn.umap",
    pt.size = 0.1, label = T, split.by = "age_group")

DimPlot(tonsil_wnn_without_doublet,
    reduction = "wnn.umap",
    pt.size = 0.1, label = T)